import os
import pandas as pd
from astropy.table import Table

keep = [1,5,6,9,13,14,17,21,22]
base_path = r"C:\Users\spenc\Dropbox\RandomForests\Survey results Laia\Tables\CORRECTED\all"
paths = [base_path + fr"\Table{ii}.tex" for ii in keep]

path_not_vote = r"C:\Users\spenc\Dropbox\RandomForests\Survey results Laia\Tables\CORRECTED\did NOT vote in referendum"
paths_not_vote = [path_not_vote + '\\' + file_name for file_name in  os.listdir(path_not_vote)]

path_vote_no = r"C:\Users\spenc\Dropbox\RandomForests\Survey results Laia\Tables\CORRECTED\voted NO in referendum"
paths_vote_no = [path_vote_no + '\\' + file_name for file_name in  os.listdir(path_vote_no)]


path = paths_vote_no[0]

def read_tab(path):
    """
    read in the path of .tex table and output pandas table
    """
    with open(path, 'r') as f:
        table = f.read().splitlines()
        table = [line for line in table if not line.startswith('\\multicolumn')]
        table = '\n'.join(table)

    with open(path, 'w') as f:
        f.write(table)

    # read in the data
    tab = Table.read(path)
    # conver to Pandas
    df = tab.to_pandas()

    df.columns = df.iloc[0]
    df = df.reindex(df.index.drop(0))
    df.columns = ['var', df.columns[1].split('}')[-2][1:], df.columns[2].split('}')[-2][1:]]
    df['SE_1'] = 0
    df['SE_2'] = 0

    pd.isnull(df['var'].iloc[2])

    # switch over the SE
    for row_num in range(0, df.shape[0]):
        if pd.isnull(df['var'].iloc[row_num]):
            df.iloc[row_num - 1, 3] = df.iloc[row_num, 1]
            df.iloc[row_num - 1, 4] = df.iloc[row_num, 2]

    # drop the old SE rows
    list_nans = [ii for ii in df.index if pd.isnull(df.loc[ii, 'var'])]
    df = df.drop(list_nans)

    # replace the wacky characters
    df.columns = pd.Series(df.columns).str.replace(r'\\_', '_')
    df = df.replace({r'\\_':'_', r'\(':'', r'\)':'', r'sym{\*}':'', r'sym{\*\*}':'', r'sym{\+}':'', r'\\':''}, regex=True)

    # drop the first row if it's the same as the column name
    if df['var'].iloc[0].startswith(df.columns[1]):
        df = df.iloc[1:]

    return df

tabs = {elem:read_tab(paths[count]) for count, elem in enumerate(keep)}

for k,v in tabs.items():
    v.to_csv(base_path+r'\Table{}_fixed.csv'.format(k), index=False)

nums_not_vote = [path.split('\\')[-1].split('.tex')[0] for path in paths_not_vote]
for ii in range(len(paths_not_vote)):
    pd_tab = read_tab(paths_not_vote[ii])
    pd_tab.to_csv(path_not_vote + f'\\{nums_not_vote[ii]}_fixed.csv', index=False)

nums_vote_no = [path.split('\\')[-1].split('.tex')[0] for path in paths_vote_no]
for ii in range(len(paths_vote_no)):
    pd_tab = read_tab(paths_vote_no[ii])
    pd_tab.to_csv(path_vote_no + f'\\{nums_vote_no[ii]}_fixed.csv', index=False)
